{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 2.10 \u5728\u6b63\u5219\u5f0f\u4e2d\u4f7f\u7528Unicode\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### \u95ee\u9898\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u4f60\u6b63\u5728\u4f7f\u7528\u6b63\u5219\u8868\u8fbe\u5f0f\u5904\u7406\u6587\u672c\uff0c\u4f46\u662f\u5173\u6ce8\u7684\u662fUnicode\u5b57\u7b26\u5904\u7406\u3002"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### \u89e3\u51b3\u65b9\u6848\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u9ed8\u8ba4\u60c5\u51b5\u4e0b re \u6a21\u5757\u5df2\u7ecf\u5bf9\u4e00\u4e9bUnicode\u5b57\u7b26\u7c7b\u6709\u4e86\u57fa\u672c\u7684\u652f\u6301\u3002\n\u6bd4\u5982\uff0c \\\\d \u5df2\u7ecf\u5339\u914d\u4efb\u610f\u7684unicode\u6570\u5b57\u5b57\u7b26\u4e86\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import re\nnum = re.compile('\\d+')\n# ASCII digits\nnum.match('123')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Arabic digits\nnum.match('\\u0661\\u0662\\u0663')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u5982\u679c\u4f60\u60f3\u5728\u6a21\u5f0f\u4e2d\u5305\u542b\u6307\u5b9a\u7684Unicode\u5b57\u7b26\uff0c\u4f60\u53ef\u4ee5\u4f7f\u7528Unicode\u5b57\u7b26\u5bf9\u5e94\u7684\u8f6c\u4e49\u5e8f\u5217(\u6bd4\u5982 \\uFFF \u6216\u8005 \\UFFFFFFF )\u3002\n\u6bd4\u5982\uff0c\u4e0b\u9762\u662f\u4e00\u4e2a\u5339\u914d\u51e0\u4e2a\u4e0d\u540c\u963f\u62c9\u4f2f\u7f16\u7801\u9875\u9762\u4e2d\u6240\u6709\u5b57\u7b26\u7684\u6b63\u5219\u8868\u8fbe\u5f0f\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "arabic = re.compile('[\\u0600-\\u06ff\\u0750-\\u077f\\u08a0-\\u08ff]+')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u5f53\u6267\u884c\u5339\u914d\u548c\u641c\u7d22\u64cd\u4f5c\u7684\u65f6\u5019\uff0c\u6700\u597d\u662f\u5148\u6807\u51c6\u5316\u5e76\u4e14\u6e05\u7406\u6240\u6709\u6587\u672c\u4e3a\u6807\u51c6\u5316\u683c\u5f0f(\u53c2\u80032.9\u5c0f\u8282)\u3002\n\u4f46\u662f\u540c\u6837\u4e5f\u5e94\u8be5\u6ce8\u610f\u4e00\u4e9b\u7279\u6b8a\u60c5\u51b5\uff0c\u6bd4\u5982\u5728\u5ffd\u7565\u5927\u5c0f\u5199\u5339\u914d\u548c\u5927\u5c0f\u5199\u8f6c\u6362\u65f6\u7684\u884c\u4e3a\u3002"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "pat = re.compile('stra\\u00dfe', re.IGNORECASE)\ns = 'stra\u00dfe'\npat.match(s) # Matches"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "pat.match(s.upper()) # Doesn't match\ns.upper() # Case folds"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### \u8ba8\u8bba\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u6df7\u5408\u4f7f\u7528Unicode\u548c\u6b63\u5219\u8868\u8fbe\u5f0f\u901a\u5e38\u4f1a\u8ba9\u4f60\u6293\u72c2\u3002\n\u5982\u679c\u4f60\u771f\u7684\u6253\u7b97\u8fd9\u6837\u505a\u7684\u8bdd\uff0c\u6700\u597d\u8003\u8651\u4e0b\u5b89\u88c5\u7b2c\u4e09\u65b9\u6b63\u5219\u5f0f\u5e93\uff0c\n\u5b83\u4eec\u4f1a\u4e3aUnicode\u7684\u5927\u5c0f\u5199\u8f6c\u6362\u548c\u5176\u4ed6\u5927\u91cf\u6709\u8da3\u7279\u6027\u63d0\u4f9b\u5168\u9762\u7684\u652f\u6301\uff0c\u5305\u62ec\u6a21\u7cca\u5339\u914d\u3002"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.1"
    },
    "toc": {
      "base_numbering": 1,
      "nav_menu": {},
      "number_sections": true,
      "sideBar": true,
      "skip_h1_title": true,
      "title_cell": "Table of Contents",
      "title_sidebar": "Contents",
      "toc_cell": false,
      "toc_position": {},
      "toc_section_display": true,
      "toc_window_display": true
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}